*******************************************************************************
*   Project: The Diabetes care continuum in Venezuela						   *
*   Task: Table 1: DM BL & FU Pop						                       *
*	Author: Dina Goodman (Harvard University)      							   *
*	Code check:  				  											 
*   Date: 7 February 2021													   *
*	Last updated: 7 Nov 2021								   					*
********************************************************************************
*Set globals for entire anaylsis
global Data "/Users/dinagoodman/Dropbox/Harvard/Dissertation/Paper 3/Data"
global Results "/Users/dinagoodman/Dropbox/Harvard/Dissertation/Paper 3/Results"
global Code "/Users/dinagoodman/Dropbox/Harvard/Dissertation/Paper 3/Code"

*Load EVESCAM FU dataset**************************************************************
cd "$Data"
import excel "/Users/dinagoodman/Dropbox/4. Primary data EVESCAM FW/5.30.2021 Data Final EVESCAM FW.xlsx", sheet("EncuestaGeneral1") firstrow clear

********************************************************************************
*Data Cleaning 
********************************************************************************
* ID
rename Codigo Code

*Demographics*******************************************************************
*Age categories from baseline
rename E1edad Age
recode Age (20/39.999 = 1 "<40") (40/49.9999 = 2 "40-49") (50/59.9999 = 3 "50-59") (60/97 = 4 "60+"), gen(agecat)
recode agecat (1 2 = 1 "<50") (3=2 "50-59") (4 = 3 "60+"), gen (age3cat)
recode Age (20/59.999 = 1 "<60") (60/97 = 2 "60+"), gen(age2cat)

*region
*Region corrections 
replace Region="Nor-Oriente" if Code=="B466" | Code=="B536" | Code=="B547" | Code=="B548" | Code=="B549" | Code=="B566"
replace Region="Capital" if Code=="C192" | Code=="G138" | Code=="G141" | Code=="G176" | Code=="G195" 
encode Region, gen(region)

*Sex
rename E2sexo sex
label define sex 1 "Male" 2 "Female"
label values sex sex
recode sex (2=1 "Female") (1=0 "Male"), gen(female)

*Education 
recode E1gradoacad (1 = 1 "No formal education") (2/3 = 2 "Primary") (4/5 = 3 "Secondary") (6/9 = 4 "University or higher") (98 = .), gen(educat)

*SES -find GRAFAR SCALE
recode SES1 (1 2 = 1 "High & Med-High") (3 = 2 "Medium") (4 5= 3 "Relative & Extreme Poverty"), gen(ses)

*Localidad
recode zona (1=0 "Rural") (2=1 "Urban"), gen(urban)

*Race
rename E1raza race
label define race 1 "White" 2 "Afrovenezuelan" 3 "Mixed race" 4 "Indigenous" 5 "Other"
label values race race

*Food insecurity
egen hh_fis_scale= rowtotal(SA1 SA2 SA3 SA4 SA5 SA6 SA7 SA8)
recode hh_fis_scale (0=0 "HH food secure") (1/3=1 "Mild HH food insecurity") (4/6=2 "Moderate HH food insecurity") (7/8=3 "Severe HH food insecurity"), gen(elsca_hh)
label variable elsca_hh "LAC Food Insecurity Score for all HH"

*Generate score for ELSCA for households with minors 
egen minor_fis_scale= rowtotal(SA*) if SA9!=.
recode minor_fis_scale (0=0 "HH food secure") (1/5=1 "Mild HH food insecurity") (6/10=2 "Moderate HH food insecurity") (11/15=3 "Severe HH food insecurity"), gen(elsca_minor)
label variable elsca_minor "LAC Food Insecurity Score for HH with minors"

*Generate score for ELSCA for households without minors 
egen nomin_fis_scale= rowtotal(SA1 SA2 SA3 SA4 SA5 SA6 SA7 SA8) if SA9==.
recode nomin_fis_scale (0=0 "HH food secure") (1/3=1 "Mild HH food insecurity") (4/6=2 "Moderate HH food insecurity") (7/8=3 "Severe HH food insecurity"), gen(elsca_nominor)
label variable elsca_nominor "LAC Food Insecurity Score for HH without minors"

*combine scores
gen elsca = .
	replace elsca = 0 if elsca_nominor==0 | elsca_minor==0
	replace elsca = 1 if elsca_nominor==1 | elsca_minor==1
	replace elsca = 2 if elsca_nominor==2 | elsca_minor==2	
	replace elsca = 3 if elsca_nominor==3 | elsca_minor==3
label variable elsca "LAC Food Insecurity Score"
label define elsca 0 "Food secure"  1 "Mild food insecurity" 2 "Moderate food insecurity"  3 "Severe food insecurity"
label values elsca elsca

*Clinical Indicators******************************************************
**Body Weight
rename E1Peso weight_bl
rename E2Peso weight_fu
*Percent body weight change
gen weight_percentchange = ((weight_bl-weight_fu)/weight_bl)*100
gen weight_pchange_cat = .
replace weight_pchange_cat=0 if weight_percentchange < -1
replace weight_pchange_cat=1 if weight_percentchange>=-1 & weight_percentchange<=1
replace weight_pchange_cat=2 if weight_percentchange >1

**BMI
rename E1talla height_bl 
rename E2talla height_fu 
gen bmi_bl= weight_bl / ((height_bl)^2)
gen bmi_fu= weight_fu / ((height_fu)^2)
*BMI Category
recode bmi_bl (10/18.4999=0 "Underweight") (18.5/24.9999=1 "Normal Weight") (25/29.99999=2 "Overweight") (30/64=3 "Obesity"), gen(bmicat_bl)
label variable bmicat_bl "BMI Category BL"
recode bmi_fu (10/18.4999=0 "Underweight") (18.5/24.9999=1 "Normal Weight") (25/29.99999=2 "Overweight") (30/64=3 "Obesity"), gen(bmicat_fu)
label variable bmicat_fu "BMI Category FU"

*Overweight
gen overweight_bl=1 if bmi_bl>=25 & bmi_bl!=.
replace overweight_bl=0 if bmi_bl<25
replace overweight_bl=. if bmi_bl==.

gen overweight_fu=1 if bmi_fu>=25 & bmi_fu!=.
replace overweight_fu=0 if bmi_fu<25
replace overweight_fu=. if bmi_fu==.

**Clinical diabetes 
*Glucose measurements
rename E1glucbasal fbg_bl
rename E2glucbasal2 fbg_fu
rename E1glucpost plg_bl
rename E2glucpost plg_fu
*Treatment
recode E1ttoDM (2=0 "No") (1 =1 "Yes"), gen(dmtreat_bl)
recode E2ttoDM (2=0 "No") (1 =1 "Yes"), gen(dmprescribed_fu)
recode E2ttoregularDM (1/3 = 1 "Yes") (4=0 "No"), gen(dmtreat_fu)

recode E1tipottoDM (1=1 "Insulin") (2 =2 "Oral drugs") (3 =3 "Both"), gen(dmtreat_type_bl)
recode E2tipottoDM (1=1 "Insulin") (2 =2 "Oral drugs") (3 =3 "Both"), gen(dmtreat_type_fu)

*Past diagnosis
*Personal history of diabetes
recode E1antperDM (3=0 "No") (1=1 "Yes") (2=2 "Yes, but GDM only") (4=.), gen(dmself_bl)
label variable dmself_bl "Past diabetes diagnosis at BL" 
recode E2antperDM (4=0 "No") (1/2=1 "Yes") (3=2 "Yes, but GDM only") (98=.) (4= .), gen(dmself_fu)
label variable dmself_fu "Past diabetes diagnosis at FU"

*Set people with no previous diabetes diagnosis to missing trt
replace dmtreat_bl=. if (dmself_bl==0 | dmself_bl==2) & dmtreat_bl==1

*Clinical diabetes - BASELINE - self
gen long clindia_self_bl=.
replace clindia_self_bl=1 if dmself_bl==1 | (plg_bl>=200 & plg_bl!=.) | (fbg_bl>=126 & fbg_bl!=.) 
replace clindia_self_bl=0 if plg_bl<200 & fbg_bl<126 & (dmself_bl==0 | dmself_bl==2 | dmself_bl==.)
replace clindia_self_bl=. if plg_bl==. & fbg_bl==. & dmself_bl==.

*Clinical diabetes - FOLLOWUP - self
gen long clindia_self_fu=.
replace clindia_self_fu=1 if dmself_fu==1 | (plg_fu>=200 & plg_fu!=.) | (fbg_fu>=126 & fbg_fu!=.)
replace clindia_self_fu=0 if plg_fu<200 & fbg_fu<126 & (dmself_fu==0 | dmself_fu==2 | dmself_fu==.)
replace clindia_self_fu=1 if clindia_self_bl==1
replace clindia_self_fu=. if plg_fu==. & fbg_fu==. & dmself_fu==.

*Undiagnosed diabetes *BL
gen long unddia_bl=.
replace unddia_bl=1 if clindia_self_bl==1 & (dmself_bl==0 | dmself_bl==2)
replace unddia_bl=0 if dmself_bl==1 
label variable unddia_bl "Undiagnosed diabetes at BL"
label values unddia_bl yesno

*Undiagnosed diabetes *FU
gen long unddia_fu=.
replace unddia_fu=1 if clindia_self_fu==1 & (dmself_fu==0 | dmself_fu==2)
replace unddia_fu=0 if dmself_fu==1
label variable unddia_fu "Undiagnosed diabetes at fu"
label values unddia_fu yesno

*Diagnosed diabetes *BL
gen diagdia_bl=0
replace diagdia_bl=1 if unddia_bl==0
replace diagdia_bl=. if unddia_bl==.
label variable diagdia_bl "Diagnosed diabetes BL"
label values diagdia_bl yesno

*Diagnosed diabetes *FU
gen diagdia_fu=0
replace diagdia_fu=1 if unddia_fu==0
replace diagdia_fu=. if unddia_fu==.
label variable diagdia_fu "Diagnosed diabetes FU"
label values diagdia_fu yesno

*Glycemic control - BL
gen long control_dia_bl=1
replace control_dia_bl=0 if clindia_self_bl==1 & (fbg_bl>=154 & fbg_bl!=.)
replace control_dia_bl=. if clindia_self_bl==. & fbg_bl==.
label variable control_dia_bl "Glycemic Control BL"
label values control_dia_bl yesno

*Glycemic control - FU
gen long control_dia_fu=1
replace control_dia_fu=0 if clindia_self_fu==1 & (fbg_fu>=154 & fbg_fu!=.)
replace control_dia_fu=. if clindia_self_fu==. & plg_fu==. & fbg_fu==.
label variable control_dia_fu "Glycemic Control FU" 
label values control_dia_fu yesno

*Individual Continuum Steps
***Step 1
*All diabetes: Baseline
gen cont_alldia_bl=0
replace cont_alldia_bl=1 if clindia_self_bl==1
*All diabetes: Follow-up
gen cont_alldia_fu=0
replace cont_alldia_fu=1 if clindia_self_bl==1
***Step 2
*Aware: Baseline
gen cont_aware_bl=0
replace cont_aware_bl=1 if clindia_self_bl==1 & diagdia_bl==1
*Aware: Follow up
gen cont_aware_fu=0
replace cont_aware_fu=1 if clindia_self_bl==1 & diagdia_fu==1 
***Step 3 
*Treatment: Baseline
gen cont_trt_bl=0
replace cont_trt_bl=1 if clindia_self_bl==1 & diagdia_bl==1 & dmtreat_bl==1 
*Treatment: Follow up
gen cont_trt_fu=0
replace cont_trt_fu=1 if clindia_self_bl==1 & diagdia_fu==1 & dmtreat_fu==1 
***Step 4: Glycemic Control
*Glycemic Control: Baseline
gen cont_control_bl=0
replace cont_control_bl=1 if clindia_self_bl==1 & diagdia_bl==1 & dmtreat_bl==1 & control_dia_bl==1
*Glycemic Control: Follow up
gen cont_control_fu=0
replace cont_control_fu=1 if clindia_self_bl==1 & diagdia_fu==1 & dmtreat_fu==1 & control_dia_fu==1

*Continuum as categorical variable 
gen cont_bl =.
replace cont_bl=1 if clindia_self_bl==1
replace cont_bl=2 if clindia_self_bl==1 & diagdia_bl==1
replace cont_bl=3 if clindia_self_bl==1 & diagdia_bl==1 & dmtreat_bl==1 
replace cont_bl=4 if clindia_self_bl==1 & diagdia_bl==1 & dmtreat_bl==1 & control_dia_bl==1
label define continuum 1 "All " 2 "Aware" 3 "On treatment" 4 "Controlled"
label values cont_bl continuum

*Continuum - follow up
gen cont_fu =.
replace cont_fu=1 if clindia_self_bl==1
replace cont_fu=2 if clindia_self_bl==1 & diagdia_fu==1
replace cont_fu=3 if clindia_self_bl==1 & diagdia_fu==1 & dmtreat_fu==1 
replace cont_fu=4 if clindia_self_bl==1 & diagdia_fu==1 & dmtreat_fu==1 & control_dia_fu==1
label values cont_fu continuum

gen score = cont_fu - cont_bl
gen score_cat = 1 if score==0
replace score_cat = 0 if score<0
replace score_cat = 2 if score>0
replace score_cat = . if score==.
//set 1 to be reference category

**Hypertension
*BP Averages
*SYSTOLIC
rename E1PAS1 sbp1_bl
rename E1PAS2 sbp2_bl
egen sbp_bl= rowmean(sbp1_bl sbp2_bl)
rename E2PAS1 sbp1_fu
rename E2PAS2 sbp2_fu
egen sbp_fu= rowmean(sbp1_fu sbp2_fu)
*DIASTOLIC
rename E1PAD1 dbp1_bl
rename E1PAD2 dbp2_bl
egen dbp_bl= rowmean(dbp1_bl dbp2_bl)
rename E2PAD1 dbp1_fu
rename E2PAD2 dbp2_fu
egen dbp_fu= rowmean(dbp1_fu dbp2_fu)

*BP meds
*Treatment
destring E1ttoHTA, gen(bptrt_bl)
replace bptrt_bl=. if bptrt_bl==3
recode bptrt_bl (2=0 "No") (1=1 "Yes"), gen(bptreat_bl)
drop bptrt_bl
recode E2ttoHTA (2=0 "No") (1=1 "Yes"), gen(bptreat_fu)
*Personal history of HTA
recode E1antperHTA (3=0 "No") (1=1 "Yes") (2=2 "Yes, but pregnancy only") (4=.), gen(htaself_bl)
label variable htaself_bl "Past hypertension diagnosis at BL" 
recode E2antperHTA (3=0 "No") (1=1 "Yes") (2=2 "Yes, but pregnancy only"), gen(htaself_fu)
label variable htaself_fu "Past hypertension diagnosis at FU"

*Calculate Hypertension
*baseline
gen htn_bl=.
replace htn_bl=1 if (sbp_bl>=140 & sbp_bl!=.) | (dbp_bl>=90 & dbp_bl!=.) | bptreat_bl==1
replace htn_bl=0 if sbp_bl<140 & dbp_bl<90 & (bptreat_bl==0 |bptreat_bl==.)
label values htn_bl yesno 
label variable htn_bl "Hypertension BL"

*follow up
gen htn_fu=.
replace htn_fu=1 if (sbp_fu>=140 & sbp_fu!=.) | (dbp_fu>=90 & dbp_fu!=.) | bptreat_fu==1
replace htn_fu=0 if sbp_fu<140 & dbp_fu<90 & (bptreat_fu==0 |bptreat_fu==.)
label values htn_fu yesno 
label variable htn_fu "Hypertension FU"

/*BP control 
*BL
gen control_bp_bl=0
replace control_bp_bl=1 if (SBPaverage<140 | DBPaverage<90)
replace control_bp_bl=. if SBPaverage==. & DBPaverage==.
label values control_bp_bl yesno
label variable control_bp_bl "BP Control"
*FU
gen control_bp_fu=0
replace control_bp_fu=1 if (sbp_fu<140 | dbp_fu<90)
replace control_bp_fu=. if sbp_fu==. & dbp_fu==.
label values control_bp_fu yesno
label variable control_bp_fu "BP Control"
*/

**Cholesterol
rename E1LDL ldl_bl
rename E2LDL ldl_fu

gen highldl_bl=0
replace highldl_bl=1 if ldl_bl>100

gen highldl_fu=0
replace highldl_fu=1 if ldl_fu>100


keep Code E1fechaeval E2fechaeval Age agecat age3cat age2cat race female educat ses urban Yearsexposure fbg_* plg_* dmself_* unddia_* diagdia_* control_dia_* dmtreat_*  clindia_* cont_* htn_* highldl_* bmicat_* bmi_* weight_percentchange weight_pchange_cat overweight_* score score_cat region 

save "Paper 3_Final FU Dataset.dta", replace

***Tabulating Longitudinal Cascade!
cd "$Results"
*All diabetes
prop cont_alldia_bl, over(clindia_self_bl)
prop cont_alldia_fu, over(clindia_self_bl)
*Awareness
prop cont_aware_bl, over(cont_alldia_bl)
prop cont_aware_fu, over(clindia_self_bl)
*Treatment
prop cont_trt_bl, over(cont_alldia_bl)
prop cont_trt_fu, over(clindia_self_bl)
*Control
prop cont_control_bl, over(cont_alldia_bl)
prop cont_control_fu, over(clindia_self_bl)

*paired ttests to compare each stage from BL to FU
ttest cont_aware_bl == cont_aware_fu if clindia_self_bl==1
ttest cont_trt_bl == cont_trt_fu if clindia_self_bl==1
ttest cont_control_bl == cont_control_fu if clindia_self_bl==1

*Numbers in each group 
tab cont_bl cont_fu

tab cont_trt_bl cont_trt_fu if clindia_self_bl==1, row chi2
tab cont_trt_bl cont_trt_fu if clindia_self_bl==1, row chi2
tab cont_control_bl cont_control_fu if clindia_self_bl==1, row chi2

*Test for Goodarz
foreach i in 1 2 3 4 5 6 7 8 {
    hist E1fechaeval if region== `i', saving(BL_`i') title("BL Region: `i'")
	hist E2fechaeval if region== `i', saving(FU_`i') title("FU Region: `i'")
	graph combine BL_`i'.gph FU_`i'.gph, saving(`i') col(1) iscale(1)
}
graph combine 1.gph 2.gph 3.gph 4.gph 5.gph 6.gph 7.gph 8.gph, saving(time_region) rows(8) ycommon xsize(5) ysize(30)

*A1C switches
gen a1c_bl = (fbg_bl + 46.7)/28.7
sum a1c_bl
gen a1c_fu = (fbg_fu + 46.7)/28.7
sum a1c_fu

*Underweight proportion
tab bmi_bl

